val empDF = spark.createDataFrame(Seq(
(7369, "RAJ SEKHAR", "CLERK", 7902, "17-Dec-80", 800, 20, 10),
(7499, "SMITH ALLEN", "SALESMAN", 7698, "20-Feb-81", 1600, 300, 30),
(7521, "JAMES WARD", "SALESMAN", 7698, "22-Feb-81", 1250, 500, 30),
(7566, "JONES SMITH ", "MANAGER", 7839, "2-Apr-81", 2975, 0, 20),
(7654, "MARTIN USHA", "SALESMAN", 7698, "28-Sep-81", 1250, 1400, 30),
(7698, "JEMS BLAKE", "MANAGER", 7839, "1-May-81", 2850, 0, 30),
(7782, "MARK CLARK", "MANAGER", 7839, "9-Jun-81", 2450, 0, 10),
(7788, "TOM SCOTT", "ANALYST", 7566, "19-Apr-87", 3000, 0, 20),
(7839, "RAJ KING", "PRESIDENT", 0, "17-Nov-81", 5000, 0, 10),
(7844, "BOLT TURNER", "SALESMAN", 7698, "8-Sep-81", 1500, 0, 30),
(7876, "YEND ADAMS", "CLERK", 7788, "23-May-87", 1100, 0, 20)
)).toDF("empno", "ename", "job", "mgr", "hiredate", "sal", "comm", "deptno")
DataFrame Schema before Split
empDF.printSchema
DataFrame Schema After Spiting ename column into First Name and last name
import org.apache.spark.sql.functions._val empDF1 = empDF.select(split(col("ename")," ").getItem(0).as("FirstName"),split(col("ename")," ").getItem(1).as("LastName")).drop("name")
empDF1.printSchema
import org.apache.spark.sql.functions._
val empDF1 = empDF.select(col("empno"),
val empDF1 = empDF.select(col("empno"),
split(col("ename")," ").getItem(0).as("FirstName"),
split(col("ename")," ").getItem(1).as("LastName"),
col("job"),col("mgr"),col("hiredate"),
col("sal"),col("comm"),col("deptno")).drop("name")
empDF1.printSchema
empDF1.printSchema
val empDF1 = empDF.select(split(col("ename")," ").getItem(0).as("FirstName"),
split(col("ename")," ").getItem(1).as("LastName"),
split(col("hiredate"),"-").getItem(0).as("Day"),
split(col("hiredate"),"-").getItem(1).as("Month"),
split(col("hiredate"),"-").getItem(2).as("Year")).show()
No comments:
Post a Comment